rm(list=ls())
library(tidyverse)
library(readxl)

# Read and structure mun population from 2000 to 2019

read_pop <- function(y,tab){
  read_excel(tab,
             sheet = paste0(y),
             skip = 3,
             col_names = c("mun_code","mun_name","var_desc","mun_pop"),
             col_types = c(rep("text",3),"numeric"),
             na = "...") %>% 
    filter(!is.na(mun_pop)) %>% 
    mutate(year = y,
           mun_code = substring(mun_code,1,6)) %>% 
    select(mun_code,mun_name,year,mun_pop)
  }  

# File to read
files_2001_2019 <- list.files(here::here("data","raw","IBGE"), pattern = "tab6579", full.names = T)

# Apply function
pop_2001_to_2019 <- map(c(2001:2006,2008:2009,2011:2019), read_pop, tab = files_2001_2019) %>% 
  bind_rows()

# Read and structure file 2000, 2007 and 2010
file_2007 <- list.files(here::here("data","raw","IBGE"), pattern = "tab793", full.names = T)

pop_2007 <- read_excel(file_2007,
                       sheet = "Tabela",
                       skip = 3,
                       col_names = c("mun_code","mun_name","year","mun_pop"),
                       col_types = c(rep("text",3),"numeric"),
                       na = "...") %>% 
  filter(!is.na(mun_pop)) %>% 
  mutate(year = as.numeric(year),
         mun_code = substring(mun_code,1,6)) %>% 
  select(mun_code,mun_name,year,mun_pop)

file_2000 <- list.files(here::here("data","raw","IBGE"), pattern = "tab202", full.names = T)
pop_2000 <- read_excel(file_2000,
                       sheet = "2000",
                       skip = 3,
                       col_names = c("mun_code","mun_name","var_desc","sex","situation","mun_pop"),
                       col_types = c(rep("text",5),"numeric"),
                       na = "...") %>% 
  filter(!is.na(mun_pop)) %>% 
  mutate(year = 2000,
         mun_code = substring(mun_code,1,6)) %>% 
  select(mun_code,mun_name,year,mun_pop) 

file_2010 <- list.files(here::here("data","raw","IBGE"), pattern = "tab202", full.names = T)
pop_2010 <- read_excel(file_2010,
                       sheet = "2010",
                       skip = 3,
                       col_names = c("mun_code","mun_name","var_desc","sex","situation","mun_pop"),
                       col_types = c(rep("text",5),"numeric"),
                       na = "...") %>% 
  filter(!is.na(mun_pop)) %>% 
  mutate(year = 2010,
         mun_code = substring(mun_code,1,6)) %>% 
  select(mun_code,mun_name,year,mun_pop) 

pop_mun <- bind_rows(pop_2000,pop_2001_to_2019,pop_2007,pop_2010)

# Save 
write_rds(pop_mun,here::here("data","processed","citycharacteristics","pop_mun.rds"))